######## Macrotolerance and protest, metro-area analysis and models

library(sandwich)
library(arm)
library(RColorBrewer)
library(ggplot2)
library(choroplethrZip)
library(choroplethr)
library(texreg)
library(dplyr)

zero2one = function (varname){ varname = (varname - min(varname, na.rm = TRUE)) / 
                                (max(varname, na.rm = TRUE) - min(varname, na.rm = TRUE)) }

par(mfrow=c(1,1), mar=c(3,3,1,.5), cex=0.9, mgp=c(1.5,.5,0), tcl=-.25, las=1)

# read data
metro = read.csv("metro_data_rep.csv")
tol = read.csv("tol_msa_est_irt.csv")
gdelt = read.csv("GDELT/gdelt_bymsa_0711_3vers.csv")
gdelt06 = read.csv("GDELT/gdelt_bymsa_0206_2vers.csv")
msa_codes = read.csv("msa_2008_codes.csv")
msa.ideo = read.csv("mrp_msa_ideology.csv")

# merge
metdat1 = merge(metro, tol, by="MSACode", all.x=TRUE)
metdat2 = merge(metdat1, gdelt, by.x="MSACode", by.y="MSA_code", all.x=TRUE)
metdat3 = merge(metdat2, gdelt06, by.x="MSACode", by.y="MSA_code", all.x=TRUE)
metdat = merge(metdat3, msa.ideo, by="MSACode", all=TRUE)
metdat[, c("MSAName.x", "MetroName")] = NULL
names(metdat)[names(metdat)=="MSAName.y"] = "MSAName"

summary(metdat)
round(sapply(metdat, sd, na.rm=TRUE), 2)


## Recode data

# remove DC
metdat = metdat[metdat$MSACode!=47900,]

# recode missing protest
metdat$MSAName[is.na(metdat$GdeltMSARaw)]
metdat$MSAName[is.na(metdat$GdeltMSA4Filter)]
metdat$MSAName[is.na(metdat$Gdelt0206MSARaw)]
metdat$GdeltMSARaw = ifelse(is.na(metdat$GdeltMSARaw), 0, metdat$GdeltMSARaw)
metdat$GdeltMSA4Filter = ifelse(is.na(metdat$GdeltMSA4Filter), 0, metdat$GdeltMSA4Filter)
metdat$GdeltMSA5Filter = ifelse(is.na(metdat$GdeltMSA5Filter), 0, metdat$GdeltMSA5Filter)
metdat$Gdelt0206MSARaw = ifelse(is.na(metdat$Gdelt0206MSARaw), 0, metdat$Gdelt0206MSARaw)
metdat$Gdelt0206MSA4Filter = ifelse(is.na(metdat$Gdelt0206MSA4Filter), 0, metdat$Gdelt0206MSA4Filter)

# create new variables
metdat$LnPop10 = log(metdat$Pop10/1e6)
metdat$LnPop00 = log(metdat$Pop00/1e6)
metdat$LnPop0711 = log(metdat$Pop0711/1e6)

metdat$RatProtRaw = metdat$GdeltMSARaw/5/(metdat$Pop0711/1e6)
metdat$RatProt4F = metdat$GdeltMSA4Filter/5/(metdat$Pop0711/1e6)
metdat$RatProt5F = metdat$GdeltMSA5Filter/5/(metdat$Pop0711/1e6)

sort(metdat$RatProt4F)
summary(metdat$RatProt4F)
plot(x=metdat$RatProt4F, y=log(metdat$RatProt4F+3))

metdat$LnRatProtRaw = log(metdat$RatProtRaw+1)
metdat$LnRatProt4F = log(metdat$RatProt4F+1)
metdat$LnRatProt5F = log(metdat$RatProt5F+1)
metdat$RatProt0206Raw = metdat$Gdelt0206MSARaw/5/(metdat$Pop00/1e6)
metdat$RatProt02064F = metdat$Gdelt0206MSA4Filter/5/(metdat$Pop00/1e6)
metdat$LnRatProt0206Raw = log(metdat$RatProt0206Raw+1)
metdat$LnRatProt02064F = log(metdat$RatProt02064F+1)

metdat$MedInc10k = metdat$MedInc0711/10000
metdat$PcChgPop = (metdat$Pop10-metdat$Pop00)/metdat$Pop00
metdat$RelCongP1k = (metdat$RelCong10/metdat$Pop10)*1000
metdat$VolOrgsP1m = metdat$VolOrgsPC/100
metdat$South = ifelse(metdat$MSARegion=="S", 1, 0)
metdat$Turnout12 = metdat$TotVot12/metdat$PopOv18

# z-standardize
metdatz = metdat
metdatz = data.frame(sapply(metdatz, as.numeric))
metdatz = data.frame(sapply(metdatz, scale))
metdatz[,c("MSACode", "MSANum", "MSAStt", "MSAStt1", "MSARegion", "MSAName", "GdeltMSA4Filter", 
           "GdeltMSA5Filter", "GdeltMSARaw", "LnPop0711")] = 
  metdat[,c("MSACode", "MSANum", "MSAStt", "MSAStt1", "MSARegion", "MSAName", "GdeltMSA4Filter",
            "GdeltMSA5Filter", "GdeltMSARaw", "LnPop0711")]

# 0-1 standardize
metdat01 = metdat
metdat01 = data.frame(sapply(metdat01, as.numeric))
metdat01 = data.frame(sapply(metdat01, zero2one))
metdat01[,c("MSACode", "MSANum", "MSAStt", "MSAStt1", "MSARegion", "MSAName", "GdeltMSA4Filter", 
            "GdeltMSA5Filter", "GdeltMSARaw", "LnPop0711")] = 
  metdat[,c("MSACode", "MSANum", "MSAStt", "MSAStt1", "MSARegion", "MSAName", "GdeltMSA4Filter",
            "GdeltMSA5Filter", "GdeltMSARaw", "LnPop0711")]


## Examine

# sort protest counts
metdat$MSAName[order(metdat$GdeltMSARaw, decreasing=TRUE)]
metdat[order(metdat$GdeltMSARaw, decreasing=TRUE), c("MSAName", "GdeltMSARaw")]
metdat[order(metdat$GdeltMSA4Filter, decreasing=TRUE), c("MSAName", "GdeltMSA4Filter")]
head(metdat[order(metdat$GdeltMSA4Filter, decreasing=TRUE), c("MSAName", "GdeltMSA4Filter")], 10)
tail(metdat[order(metdat$GdeltMSA4Filter, decreasing=TRUE), c("MSAName", "GdeltMSA4Filter")], 10)
tail(metdat[order(metdat$GdeltMSARaw, decreasing=TRUE), c("MSAName", "GdeltMSARaw")], 15)
hist(metdat$GdeltMSA4Filter, col="grey", breaks=c(-1,seq(100,1600,by=100)))

# sort protest rates
metdat$MSAName[order(metdat$RatProt4F, decreasing=TRUE)]
metdat[order(metdat$RatProt4F, decreasing=TRUE), c("MSAName", "RatProt4F")]
head(metdat[order(metdat$RatProt4F, decreasing=TRUE), c("MSAName", "RatProt4F")], 10)
tail(metdat[order(metdat$RatProt4F, decreasing=TRUE), c("MSAName", "RatProt4F")], 10)
hist(metdat$RatProt4F, col="grey")
hist(metdat$LnRatProt4F, col="grey")

# sort tolerance
metdat$MSAName[order(metdat$GenTolMSA, decreasing=TRUE)]
metdat[order(metdat$GenTolMSA, decreasing=TRUE), c("MSAName", "GenTolMSA")]
head(metdat01[order(metdat01$GenTolMSA, decreasing=TRUE), c("MSAName", "GenTolMSA")], 10)
tail(metdat01[order(metdat01$GenTolMSA, decreasing=TRUE), c("MSAName", "GenTolMSA")], 10)
hist(metdat01$GenTolMSA, col="grey")

# all
write.csv(metdat[c("MSAName", "GdeltMSA4Filter", "RatProt4F", "GenTolMSA")], "tol_prot_table1.csv")


## Descriptives

met.var = c("GdeltMSA4Filter", "LnPop0711", "GenTolMSA", "ObamaPc12", "Gini", "DissWhBl10", 
             "EthFrac10", "PcChgPop", "PcUnemp0711", "MedInc10k", "PcStud", "PcUnd18", 
             "PcHSDip", "VolOrgsP1m", "RelCongP1k")

cbind(sapply(metdat01[,met.var], min, na.rm=TRUE), sapply(metdat01[,met.var], max, na.rm=TRUE),
      sapply(metdat01[,met.var], mean, na.rm=TRUE), sapply(metdat01[,met.var], sd, na.rm=TRUE))


## Distribution plots

hist(metdat$GdeltMSA4Filter, col="grey")
hist(metdat$Gdelt0206MSA4Filter, col="grey")
hist(metdat$RatProt4F, col="grey")
hist(metdat$LnRatProt4F, col="grey")
hist(metdat$LnRatProt02064F, col="grey")


## Choropleths

# Tolerance
dattol = metdat01[, c("MSACode", "GenTolMSA")]
tolcounty = merge(msa_codes, dattol, by.x="CBSA_Code", by.y="MSACode", all=TRUE)
tolcounty = tolcounty[complete.cases(tolcounty$County_FIPS), c("County_FIPS", "GenTolMSA")]
names(tolcounty) = c("region", "value")

pdf("protest_map_msa.pdf", width=8, height=4)
choro = CountyChoropleth$new(tolcounty)
choro$title = ''
choro$set_num_colors(4)
map.pal = brewer.pal(n=9, name="YlGnBu")[c(3,5,7,9)]
map.pal = brewer.pal(n=9, name="Reds")[c(3,5,7,9)]
choro$ggplot_scale = scale_fill_manual(values=map.pal, name="Macro-tolerance", na.value="white",
                                       labels=c("lowest quartile", "2nd quartile", "3rd quartile",
                                                "highest quartile", ""))
choro$ggplot_polygon = geom_polygon(aes(fill=value), colour=NA)
choro$render()
dev.off()

# Protest
datprot = metdat[, c("MSACode", "RatProt4F")]
protcounty = merge(msa_codes, datprot, by.x="CBSA_Code", by.y="MSACode", all=TRUE)
protcounty = protcounty[complete.cases(protcounty$County_FIPS), c("County_FIPS", "RatProt4F")]
names(protcounty) = c("region", "value")

pdf("protest_map_msa.pdf", width=8, height=4)
choro = CountyChoropleth$new(protcounty)
choro$title = ''
choro$set_num_colors(4)
choro$ggplot_polygon = geom_polygon(aes(fill=value), color=NA)
choro$ggplot_scale = scale_fill_manual(values=map.pal, name="Rate of protest", na.value="white",
                                       labels=c("lowest quartile", "2nd quartile", "3rd quartile",
                                                "highest quartile", ""))
choro$render()
dev.off()


## Scatterplots

pdf("tolerance_deg_protest_plot.pdf", width=6, height=6)
sp = ggplot(data=metdat01, aes(x=GenTolMSA, y=LnRatProt4F)) + 
  geom_point(size=1.5, alpha=0.5, color="black") +
  geom_smooth(method=lm, level=FALSE, size=0.8, color="black", linetype="solid") +
  theme_bw() +
  xlab("Macro-tolerance") + 
  ylab("Log Rate of Protest") 
sp
dev.off()

cor(metdat01$GenTolMSA, metdat01$LnRatProt4F, "pair")

plot(metdat$PcBachDeg, metdat$GenTolMSA, pch=16, col=rgb(0,0,0,0.5))
plot(metdat$PcRelig10, metdat$GenTolMSA, pch=16, col=rgb(0,0,0,0.5))
plot(metdat$PcBl10, metdat$GenTolMSA, pch=16, col=rgb(0,0,0,0.5))
plot(metdat$PcBachDeg, metdat$LnRatProt4F, pch=16, col=rgb(0,0,0,0.5))

plot(metdat$GenTolMSA, metdat$LnRatProt4F, pch=16, col=rgb(0,0,0,0.5))
plot(metdat$GenTolMSA, metdat$LnRatProt02064F, pch=16, col=rgb(0,0,0,0.5))
plot(resid(lm(PcBachDeg~GenTolMSA, metdat)), metdat$LnRatProt4F, pch=16, col=rgb(0,0,0,0.5))


## correlations

mod.cors = cor(
  metdat[, c("LnRatProt4F","GenTolMSA","ObamaPc12","Gini","DissWhBl10","EthFrac10","PcChgPop",
             "PcUnemp0711","MedInc10k","PcUnd18","PcStud","PcHSDip","VolOrgsP1m","RelCongP1k")], 
  use="pair")
write.csv(mod.cors, "tolerance_protest_model_correlations.csv")


## Negative binomial models of protest

gen.count = GdeltMSA4Filter ~ offset(LnPop0711) + GenTolMSA + ObamaPc12 + Gini + DissWhBl10 + EthFrac10 + PcChgPop + PcUnemp0711 + MedInc10k + PcStud + PcUnd18 + PcHSDip + VolOrgsP1m + RelCongP1k

nb1 = glm.nb(gen.count, metdat01)
nb2 = glm.nb(update(gen.count, .~.-offset(LnPop0711)+LnPop0711+Gdelt0206MSA4Filter), metdat01)
nb3 = glm.nb(update(gen.count, GdeltMSARaw~.), metdat01)
nb4 = glm.nb(update(gen.count, .~.+MRPIdeol-ObamaPc12), metdat01)
nb5 = glm.nb(update(gen.count, .~.-EthFrac10+PcWh10), metdat01)
nb6 = glm.nb(update(gen.count, .~.), metdat01[-c(55,149,271),])

screenreg(list(nb1, nb2, nb3, nb4, nb5, nb6), single=FALSE, leading.zero=FALSE, digits=2)


## Placebo: models of turnout

turn.form = Turnout12 ~ GenTolMSA + ObamaPc12 + Gini + DissWhBl10 + EthFrac10 + PcChgPop + PcUnemp0711 + MedInc10k + PcStud + PcUnd18 + PcHSDip + VolOrgsP1m + RelCongP1k

lm1 = lm(turn.form, metdat01)
lm1.rse = sqrt(diag(sandwich(lm1)))
lm1.p = pnorm(abs(coef(lm1)/lm1.rse), lower=FALSE)*2

screenreg(list(lm1), override.se=list(lm1.rse), override.pvalues=list(lm1.p), single=TRUE, leading=FALSE)


## Effect sizes

sapply(metdat01[, c("GenTolMSA", "LnPop0711", "GdeltMSA4Filter")], summary)
sapply(metdat01[, c("GenTolMSA", "LnPop0711", "GdeltMSA4Filter")], sd)

pred.dat.sm.lo = data.frame(
  LnPop0711=-2.2039, GenTolMSA=0.2245, ObamaPc12=mean(metdat01$ObamaPc12), Gini=mean(metdat01$Gini),
  DissWhBl10=mean(metdat01$DissWhBl10), EthFrac10=mean(metdat01$EthFrac10), 
  PcChgPop=mean(metdat01$PcChgPop), PcUnemp0711=mean(metdat01$PcUnemp0711), 
  PcStud=mean(metdat01$PcStud), MedInc10k=mean(metdat01$MedInc10k), PcHSDip=mean(metdat01$PcHSDip),
  PcUnd18=mean(metdat01$PcUnd18), VolOrgsP1m=mean(metdat01$VolOrgsP1m), 
  RelCongP1k=mean(metdat01$RelCongP1k))

pred.dat.sm.hi = data.frame(
  LnPop0711=-2.2039, GenTolMSA=0.5673, ObamaPc12=mean(metdat01$ObamaPc12), Gini=mean(metdat01$Gini),
  DissWhBl10=mean(metdat01$DissWhBl10), EthFrac10=mean(metdat01$EthFrac10), 
  PcChgPop=mean(metdat01$PcChgPop), PcUnemp0711=mean(metdat01$PcUnemp0711), 
  PcStud=mean(metdat01$PcStud), MedInc10k=mean(metdat01$MedInc10k), PcHSDip=mean(metdat01$PcHSDip),
  PcUnd18=mean(metdat01$PcUnd18), VolOrgsP1m=mean(metdat01$VolOrgsP1m), 
  RelCongP1k=mean(metdat01$RelCongP1k))

pred.dat.lg.lo = data.frame(
  LnPop0711=-0.1001, GenTolMSA=0.2245, ObamaPc12=mean(metdat01$ObamaPc12), Gini=mean(metdat01$Gini),
  DissWhBl10=mean(metdat01$DissWhBl10), EthFrac10=mean(metdat01$EthFrac10), 
  PcChgPop=mean(metdat01$PcChgPop), PcUnemp0711=mean(metdat01$PcUnemp0711), 
  PcStud=mean(metdat01$PcStud), MedInc10k=mean(metdat01$MedInc10k), PcHSDip=mean(metdat01$PcHSDip),
  PcUnd18=mean(metdat01$PcUnd18), VolOrgsP1m=mean(metdat01$VolOrgsP1m), 
  RelCongP1k=mean(metdat01$RelCongP1k))

pred.dat.lg.hi = data.frame(
  LnPop0711=-0.1001, GenTolMSA=0.5673, ObamaPc12=mean(metdat01$ObamaPc12), Gini=mean(metdat01$Gini),
  DissWhBl10=mean(metdat01$DissWhBl10), EthFrac10=mean(metdat01$EthFrac10), 
  PcChgPop=mean(metdat01$PcChgPop), PcUnemp0711=mean(metdat01$PcUnemp0711), 
  PcStud=mean(metdat01$PcStud), MedInc10k=mean(metdat01$MedInc10k), PcHSDip=mean(metdat01$PcHSDip),
  PcUnd18=mean(metdat01$PcUnd18), VolOrgsP1m=mean(metdat01$VolOrgsP1m), 
  RelCongP1k=mean(metdat01$RelCongP1k))

predict(nb1, pred.dat.sm.lo, "response") / 5
predict(nb1, pred.dat.sm.hi, "response") / 5
predict(nb1, pred.dat.lg.lo, "response") / 5
predict(nb1, pred.dat.lg.hi, "response") / 5

metdat[order(metdat$LnPop10), c("MSAName", "LnPop10")]


# predicted effects plots

x.sim = seq(0,1,by=0.01)

pred.dat.sm = data.frame(
  LnPop0711=-2.2039, GenTolMSA=x.sim, ObamaPc12=mean(metdat01$ObamaPc12), 
  Gini=mean(metdat01$Gini), DissWhBl10=mean(metdat01$DissWhBl10), EthFrac10=mean(metdat01$EthFrac10), 
  PcChgPop=mean(metdat01$PcChgPop), PcUnemp0711=mean(metdat01$PcUnemp0711), 
  PcStud=mean(metdat01$PcStud), MedInc10k=mean(metdat01$MedInc10k), PcHSDip=mean(metdat01$PcHSDip),
  PcUnd18=mean(metdat01$PcUnd18), VolOrgsP1m=mean(metdat01$VolOrgsP1m), 
  RelCongP1k=mean(metdat01$RelCongP1k))

pred.dat.lg = data.frame(
  LnPop0711=-0.1001, GenTolMSA=x.sim, ObamaPc12=mean(metdat01$ObamaPc12), 
  Gini=mean(metdat01$Gini), DissWhBl10=mean(metdat01$DissWhBl10), EthFrac10=mean(metdat01$EthFrac10), 
  PcChgPop=mean(metdat01$PcChgPop), PcUnemp0711=mean(metdat01$PcUnemp0711), 
  PcStud=mean(metdat01$PcStud), MedInc10k=mean(metdat01$MedInc10k), PcHSDip=mean(metdat01$PcHSDip),
  PcUnd18=mean(metdat01$PcUnd18), VolOrgsP1m=mean(metdat01$VolOrgsP1m), 
  RelCongP1k=mean(metdat01$RelCongP1k))

pred.eff.sm = predict(nb1, pred.dat.sm, "response", se.fit=TRUE)
pred.eff.lg = predict(nb1, pred.dat.lg, "response", se.fit=TRUE)

pdf("pred_effects_plot.pdf", width=6, height=6)

par(mfrow=c(1,1), mar=c(2.5, 2.5, .5, .5), cex=1, tcl=-.25)
plot(x=x.sim, y=(pred.eff.lg$fit)/5, type="l", ylab="", xlab="", 
     ylim=c(0,48), xaxt="n", yaxt="n", lwd=2)
lines(x=x.sim, y=(pred.eff.sm$fit)/5, col=grey(0.3), lwd=2, lty=2)
polygon(x=c(x.sim,x.sim[101:1]), col=rgb(0,0,0,.4), border=NA,
        y=c((pred.eff.lg$fit+1.96*pred.eff.lg$se.fit)/5, 
            (pred.eff.lg$fit[101:1]-1.96*pred.eff.lg$se.fit[101:1])/5))
abline(h=c(5, 10, 15, 20, 25, 30, 35, 40, 45), lwd=0.75, lty=3, col=rgb(0,0,0,0.6))
abline(h=c(0), lwd=0.75, lty=1, col=rgb(0,0,0,0.6))
polygon(x=c(x.sim,x.sim[101:1]), col=rgb(0,0,0,.3), border=NA,
        y=c((pred.eff.sm$fit+1.96*pred.eff.sm$se.fit)/5, 
            (pred.eff.sm$fit[101:1]-1.96*pred.eff.sm$se.fit[101:1])/5))
axis(side=1, at=seq(0,1,by=0.2), mgp=c(1.3, 0.2, 0), cex.axis=0.75)
axis(side=2, mgp=c(1.5, .5, 0), cex.axis=0.75)
mtext("Macro-tolerance", side=1, line=1.2)
mtext("Predicted annual number of protests", side=2, line=1.5, las=0)
text(x=1.02, y=31, lab="MSA is 1 std. dev. larger than mean", pos=2, cex=0.75)
text(x=0.55, y=2.2, lab="MSA is 1 std. dev. smaller than mean", pos=2, cex=0.75, col=grey(0.3))
rug(metdat01$GenTolMSA, col=rgb(0,0,0,0.5), lwd=0.75)

dev.off()


